import bs4

# request_util.open_text_url(url)
# url = "https://www.lingbi.gov.cn/mllb/whly/lyjd/141335061.html"
# # stream = requests.get(url)
# soup = bs4.BeautifulSoup(request_util.open_text_url(url),"html.parser")
# print(*soup.select("img[src^='/lingbioldfiles']"),sep="\n")

# lb_government = "https://www.lingbi.gov.cn/"
# a = [lb_government+i["src"] for i in soup.select("img[src^='/lingbioldfiles']")]

with open(r'F:\Desktop\1.html', 'r',encoding='utf8') as f:
    text = f.read()
soup = bs4.BeautifulSoup(text,"html.parser")
lists = soup.select(".doc_list>li") # 获取列表
for i in lists:
    if len(i)>0:
        spans = i.select("span")
        print(spans[0].string,"\t",spans[1].string,"\t",i.select("a")[0].get("href"))


